<!DOCTYPE html>
<html lang="en">

<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Open-LLM-VTuber</title>

    <!-- pixi live2d dependencies -->
    <!-- Load Cubism and PixiJS -->
    <!-- <script src="https://cubism.live2d.com/sdk-web/cubismcore/live2dcubismcore.min.js"></script> -->
    <script src="libs/live2dcubismcore.min.js"></script>
    <!-- <script src="https://cdn.jsdelivr.net/gh/dylanNew/live2d/webgl/Live2D/lib/live2d.min.js"></script> -->
    <script src="libs/live2d.min.js"></script>
    <!-- <script src="https://cdn.jsdelivr.net/npm/pixi.js@7.x/dist/pixi.min.js"></script> -->
    <script src="libs/pixi.min.js"></script>

    <!-- <script src="https://cdn.jsdelivr.net/gh/RaSan147/pixi-live2d-display@v0.5.0-ls-7/dist/index.min.js"></script> -->
    <script src="libs/index.min.js"></script>



    <script src="TaskQueue.js"></script>

    <!-- Voice Activation Detection -->
    <!-- <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.14.0/dist/ort.js"></script> -->
    <script src="libs/ort.js"></script>
    <!-- <script src="https://cdn.jsdelivr.net/npm/@ricky0123/vad-web@0.0.7/dist/bundle.min.js"></script> -->
    <script src="libs/bundle.min.js"></script>

    <link rel="stylesheet" href="index.css">
</head>

<body>
    <div class="top-left">
        <button id="wsStatus">Disconnected</button>
        <button id="stateDisplay">Status: loading</button>
        <input type="text" id="wsUrl" placeholder="WebSocket URL">
        <select id="configDropdown" aria-label="Configuration Selection">
            <option value="">Select Configuration</option>
        </select>
        <select id="bgDropdown" aria-label="Background Selection">
            <option value="">Select Background</option>
        </select>
    </div>

    <canvas id="canvas"></canvas>

    <div class="bottom-container">
        <div class="fixed-bottom" id="message"></div>
        <div class="control-buttons">
            <button id="micToggle">🎙️Mic is On</button>
            <button id="interruptBtn">🖐️Voice Interruption On</button>
        </div>
    </div>

    <!-- <script src="./modelDict.js"></script> -->
    <script src="./live2d.js"></script>

    <script>
        // idle: When the LLM is not thinking or speaking and is waiting for user input.
        // thinking-speaking: When the LLM is thinking or speaking.
        // interrupted: When the LLM is interrupted by the user.
        let state = "idle"; // idle, thinking-speaking, interrupted
        let audioPlayer = new Audio();
        let voiceInterruptionOn = true;
        let fullResponse = ""; // full response from the server in one conversation chain

        const stateDisplay = document.getElementById('stateDisplay');

        function updateStateDisplay() {
            stateDisplay.textContent = `Status: ${state}`;
        }

        function setState(newState) {
            state = newState;
            updateStateDisplay();
        }

        function interrupt() {
            console.log("😡👎 Interrupting conversation chain");
            console.log("Sending: " + JSON.stringify({ type: "interrupt-signal", text: fullResponse }))
            ws.send(JSON.stringify({ type: "interrupt-signal", text: fullResponse }));
            setState("interrupted");
            model2.stopSpeaking();
            audioTaskQueue.clearQueue();
            console.log("Interrupted!!!!");
        }

        let myvad;
        let previousTriggeredProbability = 0; // the possibility that triggered the last speech start
        async function init_vad() {
            myvad = await vad.MicVAD.new({
                preSpeechPadFrames: 10,
                onSpeechStart: () => {
                    console.log("Speech start detected: " + previousTriggeredProbability);
                    if (state === "thinking-speaking") {
                        interrupt();
                    } else {
                        console.log("😀👍 Not interrupted. Just normal conversation");
                    }
                },
                onFrameProcessed: (probs) => {
                    if (probs["isSpeech"] > previousTriggeredProbability) {
                        previousTriggeredProbability = probs["isSpeech"];
                    }
                },
                onVADMisfire: () => {
                    console.log("VAD Misfire. The LLM can't hear you.");
                    if (state === "interrupted") {
                        state = "idle";
                    }
                    document.getElementById("message").textContent = "The LLM can't hear you.";
                },
                onSpeechEnd: (audio) => {
                    // audio: (Float32Array of audio samples at sample rate 16000)...

                    if (!voiceInterruptionOn) {
                        stop_mic();
                    }

                    if (ws && ws.readyState === WebSocket.OPEN) {
                        sendAudioPartition(audio);
                    }
                }
            });
        }

        const chunkSize = 4096;
        async function sendAudioPartition(audio) {
            console.log(audio)
            // send the audio, a Float32Array of audio samples at sample rate 16000, to the back end by chunks
            for (let index = 0; index < audio.length; index += chunkSize) {
                const endIndex = Math.min(index + chunkSize, audio.length);
                const chunk = audio.slice(index, endIndex);
                ws.send(JSON.stringify({ type: "mic-audio-data", audio: chunk }));
            }
            ws.send(JSON.stringify({ type: "mic-audio-end" }));
        }

        // window.addEventListener('load', init_vad);

        // WebSocket connection
        let ws;
        const wsStatus = document.getElementById('wsStatus');
        const wsUrl = document.getElementById('wsUrl');
        const interruptBtn = document.getElementById('interruptBtn');
        const micToggle = document.getElementById('micToggle');
        const configDropdown = document.getElementById('configDropdown');
        const bgDropdown = document.getElementById('bgDropdown');

        wsUrl.value = "ws://127.0.0.1:12393/client-ws";
        // if running on server
        if (window.location.protocol.startsWith("http")) {
            console.log("Running on server");
            wsUrl.value = "/client-ws";
        } else { // if running on local using file://
            console.log("Running on local");
        }

        function connectWebSocket() {
            ws = new WebSocket(wsUrl.value);

            ws.onopen = function () {
                // interrupted = false;
                setState("idle");
                console.log("Connected to WebSocket");
                wsStatus.textContent = "Connected";
                wsStatus.classList.add('connected');
                fetchConfigurations();
                fetchBackgrounds();
            };

            ws.onclose = function () {
                // interrupt = false;
                setState("idle");
                console.log("Disconnected from WebSocket");
                wsStatus.textContent = "Disconnected";
                wsStatus.classList.remove('connected');
                taskQueue.clearQueue();
            };

            ws.onmessage = function (event) {
                handleMessage(JSON.parse(event.data));
            };
        }

        wsStatus.addEventListener('click', connectWebSocket);

        function handleMessage(message) {
            console.log("Received Request: \n", message);
            switch (message.type) {
                case "full-text":
                    document.getElementById("message").textContent = message.text;
                    console.log(message);
                    console.log("full-text: ", message.text);
                    break;
                case "control":
                    switch (message.text) {
                        case "start-mic":
                            start_mic();
                            break;
                        case "stop-mic":
                            stop_mic();
                            break;
                        case "conversation-chain-start":
                            setState("thinking-speaking");
                            fullResponse = "";
                            break;
                        case "conversation-chain-end":
                            setState("idle");
                            if (!voiceInterruptionOn) {
                                start_mic();
                            }
                            break;
                    }
                    break;
                case "expression":
                    setExpression(message.text);
                    break;
                case "mouth":
                    setMouth(Number(message.text));
                    break;
                case "audio":
                    if (state == "interrupted") {
                        console.log("Audio playback intercepted. Sentence:", message.text);
                    } else {
                        addAudioTask(message.audio, message.volumes, message.slice_length, message.text, message.expressions);
                        // playAudioLipSync(message.audio, message.volumes, message.slice_length, message.text, message.expressions);
                    }
                    break;
                case "set-model":
                    console.log("set-model: ", message.text);
                    live2dModule.init().then(() => {
                        live2dModule.loadModel(message.text);
                    });
                    break;
                case "listExpressions":
                    console.log(listSupportedExpressions());
                    break;
                case "config-files":
                    populateConfigDropdown(message.files);
                    break;
                case "config-switched":
                    console.log(message.message);
                    document.getElementById("message").textContent = "Configuration switched successfully!";
                    setState("idle");

                    // restore the mic state before switching config
                    if (micStateBeforeConfigSwitch) {
                        start_mic();
                    }
                    micStateBeforeConfigSwitch = null;  // reset the state
                    break;
                case "background-files":
                    populateBgDropdown(message.files);
                    break;
                default:
                    console.error("Unknown message type: " + message.type);
                    console.log(message);
            }
        }

        function fetchConfigurations() {
            ws.send(JSON.stringify({ type: "fetch-configs" }));
        }

        function fetchBackgrounds() {
            ws.send(JSON.stringify({ type: "fetch-backgrounds" }));
        }

        function populateConfigDropdown(files) {
            configDropdown.innerHTML = '<option value="">Select Configuration</option>';
            files.forEach(file => {
                const option = document.createElement('option');
                option.value = file;
                option.textContent = file;
                configDropdown.appendChild(option);
            });
        }

        function populateBgDropdown(files) {
            bgDropdown.innerHTML = '<option value="">Select Background</option>';
            files.forEach(file => {
                const option = document.createElement('option');
                option.value = file;
                option.textContent = file;
                bgDropdown.appendChild(option);
            });
        }

        configDropdown.addEventListener('change', function () {
            const selectedConfig = configDropdown.value;
            if (selectedConfig) {
                setState("switching-config");
                document.getElementById("message").textContent = "Switching configuration...";
                // avoid the mic being on when switching config
                micStateBeforeConfigSwitch = micToggleState;
                if (micToggleState) {
                    stop_mic();
                }

                interrupt();
                ws.send(JSON.stringify({ type: "switch-config", file: selectedConfig }));
            }
        });

        bgDropdown.addEventListener('change', function () {
            const selectedBg = bgDropdown.value;
            if (selectedBg) {
                document.body.style.backgroundImage = `url('./bg/${selectedBg}')`;
            }
        });

        // set expression of the model2
        // @param {int} expressionIndex - the expression index defined in the emotionMap in modelDict.js
        function setExpression(expressionIndex) {
            expressionIndex = parseInt(expressionIndex);
            model2.internalModel.motionManager.expressionManager.setExpression(expressionIndex);
            console.info(`>> [x] -> Expression set to: (${expressionIndex})`);
        }

        // [Deprecated] Check if the string contains an expression. If it does, set the expression of the model2.
        // @param {string} str - the string to check
        function checkStringForExpression(str) {
            console.log("emo map: ", emoMap);
            for (const key of Object.keys(emoMap)) {
                if (str.toLowerCase().includes("[" + key + "]")) {
                    console.info(">> [ ] <- add to exec queue: " + key + ", " + emoMap[key]);
                    taskQueue.addTask(() => { setExpression(emoMap[key]); });
                    taskQueue.addTask(() => { console.log("timing out..."); });
                    // setExpression(emoMap[key]);
                }
            }
        }
        // [Deprecated] List all supported expressions
        function listSupportedExpressions() {
            emoMap = model2.internalModel.motionManager.expressionManager.emotionMap;
            console.log(emoMap);
        }



        function setMouth(mouthY) {
            if (typeof model2.internalModel.coreModel.setParameterValueById === 'function') {
                model2.internalModel.coreModel.setParameterValueById('ParamMouthOpenY', mouthY);
            } else {
                model2.internalModel.coreModel.setParamFloat('PARAM_MOUTH_OPEN_Y', mouthY);
            }
        }

        audioTaskQueue = new TaskQueue(100); // 100ms delay between tasks
        async function addAudioTask(audio_base64, volumes, slice_length, text = null, expression_list = null) {
            console.log(`1. Adding audio task ${text} to queue`);

            // calculate the total duration of the audio
            audioLength = await getAudioLength(audio_base64);
            console.log(`2. Audio length: ${audioLength}`);

            audioTaskQueue.addTask(async () => {
                playAudioLipSync(audio_base64, volumes, slice_length, text, expression_list);
                await new Promise(resolve => setTimeout(resolve, audioLength));
                console.log(`3. Audio task ${text} completed`);
            });
        }

        async function getAudioLength(audio_base64) {
            return new Promise((resolve) => {
                const audio = new Audio("data:audio/wav;base64," + audio_base64);
                audio.onloadedmetadata = () => {
                    const audioDur = audio.duration * 1000;
                    resolve(audioDur);
                };
            });
        }


        function playAudioLipSync(audio_base64, volumes, slice_length, text = null, expression_list = null) {

            if (state === "interrupted") {
                console.error("Audio playback blocked. Sentence:", text);
                audioTaskQueue.clearQueue();
                return;
            }

            fullResponse += text;
            if (text) {
                document.getElementById("message").textContent = text;
            }

            const displayExpression = expression_list ? expression_list[0] : null;
            console.log("Start playing audio: ", text);
            model2.speak("data:audio/wav;base64," + audio_base64, { expression: displayExpression, resetExpression: false });
        }

        // Start the microphone. This will start the VAD and send audio to the server when speech is detected.
        // Once speech ends, the mic will pause.
        async function start_mic() {
            if (myvad == null) await init_vad();
            console.log("Mic start ");
            myvad.start();
            micToggleState = true;
            micToggle.textContent = "🎙️Mic is On";
        }

        function stop_mic() {
            console.log("Mic stop");
            myvad.pause();
            micToggleState = false;
            micToggle.textContent = "❌Mic is off";
        }

        interruptBtn.addEventListener('click', function () {
            voiceInterruptionOn = !voiceInterruptionOn;
            interruptBtn.textContent = voiceInterruptionOn ? "🖐️Voice Interruption On" : "❌Voice Interruption Off";
        });

        let micToggleState = true;
        micToggle.addEventListener('click', function () {
            micToggleState ? stop_mic() : start_mic();
        });

        // Initialize WebSocket connection
        connectWebSocket();
    </script>
</body>

</html>
